Set the working directory to H:/Projects/11000/11187/TS/Task 3.
Load the Scott_County_CR78_trips_78.csv file
CR_78 <- read.csv("H:/Projects/11000/11187/TS/Task 3/CR 78/scott_county_CR78_trips.csv",stringsAsFactors = FALSE)%>%
mutate(Date=ymd_hms(start_date_central,tz="US/Central"))
Date in ISO8601 format; converting timezone from UTC to "US/Central".
Link_dist_78 <- readxl::read_xlsx('H:/Projects/11000/11187/TS/Task 3/CR 78/scott_county_CR78_trips.xlsx',sheet = 'LinkDistances')
CR_42 <- read.csv("H:/Projects/11000/11187/TS/Task 3/CR 42/scott_county_CR42_trips.csv",stringsAsFactors = FALSE)%>%
mutate(Date=ymd_hms(start_date_central,tz="US/Central"))
Date in ISO8601 format; converting timezone from UTC to "US/Central".
Link_dist_42 <- readxl::read_xlsx('H:/Projects/11000/11187/TS/Task 3/CR 42/scott_county_CR42_trips.xlsx',sheet = 'LinkDistances')
The dimensions of the CR_78 dataset are 2063 rows and 20 columns. The dimensions of the CR_42 dataset are 994 rows and 20 columns.
Two combinations of links were used for analysis. The segments were loaded from the File Geodatabase on the H Drive project folder, rfgb.
The segment for CR_42 included 18 links. The segment for CR_42 included 20 links.
Scott County
CR_78
The data was collected between 2015-08-31 and 2015-11-30.
tripflag_summary_42 %>%
mutate(in_model = Total_Dist<40)%>%
ggplot(aes(x=in_model))+
geom_bar(stat = 'count')+
labs(title='Trips less than 40 miles',x=element_blank(),y='Total Count')+
scale_y_continuous(breaks = seq(0,900,50))+
theme(plot.title = element_text(hjust = 0.5))

gg <- ggplot(tripflag_summary_42[tripflag_summary_42$Total_Dist<40,])+
geom_histogram(aes(x=Total_Dist),color='black',fill='white',binwidth = 2.5)+
labs(title="Total Trip Distance histogram plot",x="Total Distance (miles)", y = "Count")
ggplotly(gg)
Start Locations:
End Locations:
CR_42
The data was collected between 2015-08-31 and 2015-11-30.
tripflag_summary_78 %>%
mutate(in_model = Total_Dist<40)%>%
ggplot(aes(x=in_model))+
geom_bar(stat = 'count')+
labs(title='Trips less than 40 miles',x=element_blank(),y='Total Count')+
scale_y_continuous(breaks = seq(0,1500,50))+
theme(plot.title = element_text(hjust = 0.5))

gg <- ggplot(tripflag_summary_78[tripflag_summary_78$Total_Dist<40,])+
geom_histogram(aes(x=Total_Dist),color='black',fill='white',binwidth = 2.5)+
labs(title="Total Trip Distance histogram plot",x="Total Distance (miles)", y = "Count")
ggplotly(gg)
Start Locations:
End Locations:
---
title: "Trip Freq Analysis"
output: html_notebook
---

```{r,echo=FALSE,include=FALSE}
library(magrittr)
library(scales)
library(stringr)
library(forecast)
library(leaflet)
library(tidyverse)
library(reshape2)
library(plotly)
library(lubridate)
library(kableExtra)
library(DT)
library(rgdal)
options(knitr.table.format = "html") 
```


Set the working directory to H:/Projects/11000/11187/TS/Task 3.

```{r 'setup',include=FALSE}
require(knitr)
opts_knit$set(root.dir = "H:/Projects/11000/11187/TS/Task 3/R")
dir()
```


Load the Scott_County_CR78_trips_78.csv file

```{r}


CR_78 <- read.csv("H:/Projects/11000/11187/TS/Task 3/CR 78/scott_county_CR78_trips.csv",stringsAsFactors = FALSE)%>%
  mutate(Date=ymd_hms(start_date_central,tz="US/Central"))

Link_dist_78 <-  readxl::read_xlsx('H:/Projects/11000/11187/TS/Task 3/CR 78/scott_county_CR78_trips.xlsx',sheet = 'LinkDistances')

CR_42 <- read.csv("H:/Projects/11000/11187/TS/Task 3/CR 42/scott_county_CR42_trips.csv",stringsAsFactors = FALSE)%>%
  mutate(Date=ymd_hms(start_date_central,tz="US/Central"))

Link_dist_42 <-  readxl::read_xlsx('H:/Projects/11000/11187/TS/Task 3/CR 42/scott_county_CR42_trips.xlsx',sheet = 'LinkDistances')

```

The dimensions of the CR_78 dataset are `r paste(dim(CR_78)[1],'rows and',dim(CR_78)[2],'columns')`.
The dimensions of the CR_42 dataset are `r paste(dim(CR_42)[1],'rows and',dim(CR_42)[2],'columns')`.


```{r,echo=FALSE,include=FALSE}

fgdb <- "H:/Projects/11000/11187/TS/Task 3/Scott County.gdb"
CR_78_seg <- readOGR(dsn=fgdb,layer="CR_78_seg")
CR_42_seg <- readOGR(dsn=fgdb,layer="CR_42_seg")

```

Two combinations of links were used for analysis. The segments were loaded from the File Geodatabase on the H Drive project folder, `rfgb`.

The segment for CR_42 included `r dim(CR_78_seg)[1]` links.
The segment for CR_42 included `r dim(CR_42_seg)[1]` links.

![CR_78 and CR_42 Analysis Links](Segment_Overview.png)

```{r, echo=FALSE,warning=FALSE,include=FALSE}
CR_78_x <-  CR_78 %>%
  select(-path,everything())%>%
  separate(path,remove = FALSE,sep='#',into = paste('V',c(1:1000),sep = ''))

size <- sapply(CR_78_x,function(x)all(is.na(x)))
min(which(size==TRUE))
colnames(CR_78_x)[min(which(size==TRUE))]

CR_42_x <-  mutate(CR_42,Date=ymd_hms(start_date_central,tz="US/Central"))%>%
  select(-path,everything())%>%
  separate(path,remove = FALSE,sep='#',into = paste('V',c(1:1000),sep = ''))

size <- sapply(CR_42_x,function(x)all(is.na(x)))
min(which(size==TRUE))
colnames(CR_42_x)[min(which(size==TRUE))]
```

```{r,echo=FALSE,warning=FALSE}

col_78 <- colnames(CR_78_x)%in% paste('V',c(1:297),sep='')

trip_components_78 <-  melt(CR_78_x,id.vars = c('trip_id',
                                       'start_latitude',
                                       'start_longitude',
                                       'end_latitude',
                                       'end_longitude',
                                       'Date'),
                        measure.vars = colnames(CR_78_x)[col_78],
                        value.name = 'A+B',
                        sep='') 

trip_components_78 <-   merge(trip_components_78,Link_dist_78,by='A+B') %>% arrange(trip_id)

###trips_78 can go through our segment
###trips_78 <- filter(trip_components_78,trip_components_78$`A+B` %in% CR_78_seg$Link)
####trip_ids <- trips_78[!duplicated(trips_78$trip_id),]

col_42 <- colnames(CR_42_x)%in% paste('V',c(1:270),sep='')

trip_components_42 <-  melt(CR_42_x,id.vars = c('trip_id',
                                       'start_latitude',
                                       'start_longitude',
                                       'end_latitude',
                                       'end_longitude',
                                       'Date'),
                        measure.vars = colnames(CR_42_x)[col_42],
                        value.name = 'A+B',
                        sep='') 

trip_components_42 <-   merge(trip_components_42,Link_dist_42,by='A+B') %>% arrange(trip_id)

rm(CR_78_x,CR_42_x)

```

```{r,echo=FALSE,warning=FALSE}
###find the total distance for each Trip
tripflag_summary_78 <- trip_components_78 %>%
  group_by(trip_id)%>%
  summarise(Total_Dist = sum(DISTANCE))%>%
  arrange(desc(Total_Dist))

tripflag_summary_78 <- merge(tripflag_summary_78,
                             CR_78[,c("trip_id","Date","start_latitude",
                                      "start_longitude","end_latitude",
                                      "end_longitude")],
                             by='trip_id')

###datatable(tripflag_summary_78,filter='top')

###find the total distance for each Trip
tripflag_summary_42 <- trip_components_42 %>%
  group_by(trip_id)%>%
  summarise(Total_Dist = sum(DISTANCE))%>%
  arrange(desc(Total_Dist))

tripflag_summary_42 <- merge(tripflag_summary_42,
                             CR_42[,c("trip_id","Date","start_latitude",
                                      "start_longitude","end_latitude",
                                      "end_longitude")],
                             by='trip_id')

###datatable(tripflag_summary_42,filter='top')

```


```{r,echo=FALSE,warning=FALSE}
###attached total trip distance to each link
###filter the trips for only our trips of interest in segment 42
trip_comp_merge_78 <- merge(trip_components_78,tripflag_summary_78,
                         by.x='trip_id',by.y='trip_id',all.x=TRUE)%>%
  arrange(desc(Total_Dist))%>%
  filter(`A+B` %in% CR_78_seg$Link)%>%
  filter(Total_Dist<40)

###summarise the trip distances by our links 
###this summary is for trips less than 50 miles.
###trip total distance is not accurate outside of this range. 
link_details_78 <- trip_comp_merge_78 %>% group_by(`A+B`) %>%
  summarise(Mean = mean(Total_Dist),
            Max = max(Total_Dist),
            Min = min(Total_Dist),
            Std = sd(Total_Dist))

###datatable(trips_78,filter='top')

###attached total trip distance to each link
###filter the trips for only our trips of interest in segment 42
trip_comp_merge_42 <- merge(trip_components_42,tripflag_summary_42,
                         by.x='trip_id',by.y='trip_id',all.x=TRUE)%>%
  arrange(desc(Total_Dist))%>%
  filter(`A+B` %in% CR_42_seg$Link)%>%
  filter(Total_Dist<40)

###summarise the trip distances by our links 
###this summary is for trips less than 50 miles.
###trip total distance is not accurate outside of this range. 
link_details_42 <- trip_comp_merge_42 %>% group_by(`A+B`) %>%
  summarise(Mean = mean(Total_Dist),
            Max = max(Total_Dist),
            Min = min(Total_Dist),
            Std = sd(Total_Dist))


###datatable(trips_42,filter='top')


```


## Scott County {.tabset .tabset-fade}

### CR_78

The data was collected between `r format(min(CR_78$Date),'%Y-%m-%d')` and `r format(max(CR_78$Date),'%Y-%m-%d')`.

```{r,echo=FALSE,fig.height=5}

CR_78_plot <- merge(CR_78_seg,link_details_78,by.x='Link',by.y='A+B')

CR_78_plot$label <- paste(CR_78_plot$AB,'Mean Dist:',as.character(round(CR_78_plot$Mean,2)))

leaflet(data=CR_78_plot,width = '100%') %>%
  setView(lng = -93.550825, lat =  44.771614, zoom = 13)%>%
  addPolylines(highlightOptions = highlightOptions(color = "white", weight = 2,
      bringToFront = TRUE),label=~label) %>% 
  addProviderTiles(providers$OpenStreetMap)

```

```{r}

tripflag_summary_42 %>% 
  mutate(in_model = Total_Dist<40)%>%
  ggplot(aes(x=in_model))+
  geom_bar(stat = 'count')+
  labs(title='Trips less than 40 miles',x=element_blank(),y='Total Count')+
  scale_y_continuous(breaks = seq(0,900,50))+
  theme(plot.title = element_text(hjust = 0.5))
```

```{r}
gg <-  ggplot(tripflag_summary_42[tripflag_summary_42$Total_Dist<40,])+
  geom_histogram(aes(x=Total_Dist),color='black',fill='white',binwidth = 2.5)+
  labs(title="Total Trip Distance histogram plot",x="Total Distance (miles)", y = "Count")
  
ggplotly(gg)

```


Start Locations:
```{r,echo=FALSE,fig.height=5}
icon_start <- makeIcon('icon_start.png',iconWidth = 10, iconHeight = 8)
data <- tripflag_summary_78 %>%filter(Total_Dist<40)

list_radius <- list(1609.34*5,1609.34*10,1609.34*20,1609.34*40)
list_label <- list('5 miles','10 miles','20 miles','40 miles')
```

```{r,echo=FALSE,fig.height=5}

leaflet(data=data,width = '100%') %>%
  setView(lng = -93.50508, lat =  44.753019, zoom = 10)%>%
  addTiles() %>% 
  addMarkers(~start_longitude,~start_latitude,icon = icon_start,label=~trip_id) %>%
  addCircles(lng = -93.50508, lat =  44.753019,radius = list_radius,
             fillColor = 'none',label =list_label)%>%
  addProviderTiles(providers$OpenStreetMap)

```


End Locations:

```{r,echo=FALSE,fig.height=5}


leaflet(data=data,width = '100%') %>%
  setView(lng = -93.50508, lat =  44.753019, zoom = 10)%>%
  addTiles() %>% 
  addMarkers(~end_longitude,~end_latitude,icon = icon_end,label=~trip_id) %>%
  addCircles(lng = -93.50508, lat =  44.753019,radius = list_radius,
             fillColor = 'none',label =list_label)%>%
  addProviderTiles(providers$OpenStreetMap)

```



### CR_42


The data was collected between `r format(min(CR_78$Date),'%Y-%m-%d')` and `r format(max(CR_78$Date),'%Y-%m-%d')`.

```{r,echo=FALSE,fig.height=5}

CR_42_plot <- merge(CR_42_seg,link_details_42,by.x='Link',by.y='A+B')

CR_42_plot$label <- paste(CR_42_plot$AB,'Mean Dist:',as.character(round(CR_42_plot$Mean,2)))

leaflet(data=CR_42_plot,width = '100%') %>%
  setView(lng = -93.550825, lat =  44.771614, zoom = 13)%>%
  addPolylines(highlightOptions = highlightOptions(color = "white", weight = 2,
      bringToFront = TRUE),label=~label) %>% 
  addProviderTiles(providers$OpenStreetMap)

```

```{r}

tripflag_summary_78 %>% 
  mutate(in_model = Total_Dist<40)%>%
  ggplot(aes(x=in_model))+
  geom_bar(stat = 'count')+
  labs(title='Trips less than 40 miles',x=element_blank(),y='Total Count')+
  scale_y_continuous(breaks = seq(0,1500,50))+
  theme(plot.title = element_text(hjust = 0.5))
```


```{r}
gg <-  ggplot(tripflag_summary_78[tripflag_summary_78$Total_Dist<40,])+
  geom_histogram(aes(x=Total_Dist),color='black',fill='white',binwidth = 2.5)+
  labs(title="Total Trip Distance histogram plot",x="Total Distance (miles)", y = "Count")
  
ggplotly(gg)

```

Start Locations:
```{r,echo=FALSE,fig.height=5}
icon_start <- makeIcon('icon_start.png',iconWidth = 10, iconHeight = 8)
data <- tripflag_summary_42%>%filter(Total_Dist<40)

list_radius <- list(1609.34*5,1609.34*10,1609.34*20,1609.34*40)
list_label <- list('5 miles','10 miles','20 miles','40 miles')
```

```{r,echo=FALSE,fig.height=5}

leaflet(data=data,width = '100%') %>%
  setView(lng = -93.50508, lat =  44.753019, zoom = 10)%>%
  addTiles() %>% 
  addMarkers(~start_longitude,~start_latitude,icon = icon_start,label=~trip_id) %>%
  addCircles(lng = -93.50508, lat =  44.753019,radius = list_radius,
             fillColor = 'none',label =list_label)%>%
  addProviderTiles(providers$OpenStreetMap)

```


End Locations:

```{r,echo=FALSE,fig.height=5}

leaflet(data=data,width = '100%') %>%
  setView(lng = -93.50508, lat =  44.753019, zoom = 10)%>%
  addTiles() %>% 
  addMarkers(~end_longitude,~end_latitude,icon = icon_end,label=~trip_id) %>%
  addCircles(lng = -93.50508, lat =  44.753019,radius = list_radius,
             fillColor = 'none',label =list_label)%>%
  addProviderTiles(providers$OpenStreetMap)

```
